## Covid-19 Impact on Airlines Industry ##

Businesses across industries all over the world are facing crisis due to the global COVID-19 pandemic and airlines industries also facing the same fate due to the pandemic.There is some improvement in last few months but how much is the impact and what time it will take to come into the previous situation, to analyze this i will use the last 10 years passenger, revenue, load and other datas, and will see there trend and behavior, for this i will use some time series plot to analyze the trend, seasonality and level of the values and in the end i will predict for the next 2 years data by using different methods like ARIMA, Additive Seasonality, Exponential Seasonality etc.

#Load the require library.

library(readxl)
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.4 ──
## ✓ ggplot2   3.3.3     ✓ fma       2.4  
## ✓ forecast  8.14      ✓ expsmooth 2.3
## 
library(forecast)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(dygraphs)
library(xts)
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
library(latticeExtra)
## Loading required package: lattice
## 
## Attaching package: 'latticeExtra'
## The following object is masked from 'package:ggplot2':
## 
##     layer
library(TSstudio)
library(Metrics)
## 
## Attaching package: 'Metrics'
## The following object is masked from 'package:forecast':
## 
##     accuracy
#Us city passenger data for both international and domestic flights.
# Load the dataset.
Passenger_data <- read_excel("Passengers_3_19_2021.xlsx",skip = 1) 
# Print First 6 rows of the dataset.
head(Passenger_data) 
#TO check the Class pf the attributes present in the dataset.
str(Passenger_data) 
## tibble [242 × 5] (S3: tbl_df/tbl/data.frame)
##  $ Year         : chr [1:242] "2002" "2002" "2002" "2002" ...
##  $ Month        : chr [1:242] "10" "11" "12" "TOTAL" ...
##  $ DOMESTIC     : num [1:242] 4.81e+07 4.49e+07 4.97e+07 5.52e+08 4.30e+07 ...
##  $ INTERNATIONAL: num [1:242] 9.58e+06 9.02e+06 1.00e+07 1.19e+08 9.73e+06 ...
##  $ TOTAL        : num [1:242] 5.76e+07 5.39e+07 5.97e+07 6.71e+08 5.28e+07 ...
anyNA(Passenger_data) # True
## [1] TRUE
dim(Passenger_data) # 242 Rows 5 Columns.
## [1] 242   5
#Remove the rows containing total values an nul values in the end because these point's will not help me in further analysis.

Passenger_data2 <- Passenger_data[-c(4,17,30,43,56,69,82,95,108,121,134,147,160,173,186,
                                    199,212,225,238:242),]

dim(Passenger_data2)
## [1] 219   5
#The new dataset has 219 rows means the 219 months data of the total passengers traveling and 5 columns. 

#Now my dataset has different columns for month and year, but for making this data frame as a time series variable I need a date column which will be the combination of both these columns so, I simply paste the month and year column into a single attribute and then transform it into the date column.
Passenger_data2$Date <- as.Date(paste(month.abb[as.numeric(Passenger_data2$Month)], "01",
                                     Passenger_data2$Year, sep="-"),format = "%b-%d-%Y")

#Next I transform my whole data into the time series to see if the trends of the international and domestic passengers are the same throughout the years or both showing different behavior.
#Transform the whole data into time series.
Passenger_Data_xts <- xts( x=Passenger_data2[,c(3:5)], order.by = Passenger_data2$Date)

ts_plot(Passenger_Data_xts)
cor(Passenger_data2$DOMESTIC,Passenger_data2$INTERNATIONAL)
## [1] 0.9041159
#By looking at the plot and correlation value I can say that both the domestic and international passenger count was highly correlated so I don't need to consider both these variables for my time series analysis if I want to do the further analysis with total passenger count. Here one more thing I want to add that as the covid-19 impact was not in a regular cycle so a model will not able to predict the actual 2020 passenger count, because of which the error will be high because the model was not able to identify the behavior of the data within this range and ultimately considered it as the outlier. so what I will do here I will predict for 2020 and will see how much impact in total it make due to covid-19.

ARIMA

#As I am dealing with the months series so I need to create a dummy variable for 12 months. I will take 228 rows because my data points start from October so I will drop the first 9 rows from the month column. 

Month <- data.frame(outer(rep(month.abb,length=228),month.abb,"==")+0)
Month <- Month[10:228,]
head(Month)
#Assign Column names as month instead of variable name for better understanding.
colnames(Month) <- month.abb
head(Month)
#Combine the passenger data with the month data. 
Passenger_data3 <- data.frame(Passenger_data2[,c(6,5)],Month)
head(Passenger_data3)
#now i will build few columns into the passenger data to analyze the additive and multiplicative seasonality of my dataset.
Passenger_data3["t"] <-1:219 #For linear impact. 
Passenger_data3["t_squared"] <- Passenger_data3["t"]*Passenger_data3["t"] #For quadratic impact

Passenger_data3["log_Passengers"] <- log(Passenger_data3["TOTAL"]) # for multiplicative impact

train_set <- Passenger_data3[1:183,]
test_set <- Passenger_data3[184:207,]


# Model Based Forecasting Analysis  
attach(train_set)

# Multiplicative Seasonality 
Multi_Season <- lm(log_Passengers ~ Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov+Dec,data = train_set)
summary(Multi_Season)
## 
## Call:
## lm(formula = log_Passengers ~ Jan + Feb + Mar + Apr + May + Jun + 
##     Jul + Aug + Sep + Oct + Nov + Dec, data = train_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.20684 -0.04749 -0.00781  0.03422  0.19812 
## 
## Coefficients: (1 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 18.01764    0.02119 850.356  < 2e-16 ***
## Jan         -0.09328    0.03046  -3.062  0.00255 ** 
## Feb         -0.14866    0.03046  -4.880 2.41e-06 ***
## Mar          0.06074    0.03046   1.994  0.04774 *  
## Apr          0.01825    0.03046   0.599  0.54980    
## May          0.05625    0.03046   1.847  0.06653 .  
## Jun          0.10432    0.03046   3.425  0.00077 ***
## Jul          0.15536    0.03046   5.100 8.94e-07 ***
## Aug          0.12243    0.03046   4.019 8.73e-05 ***
## Sep         -0.04118    0.03046  -1.352  0.17823    
## Oct          0.01825    0.02996   0.609  0.54338    
## Nov         -0.03857    0.02996  -1.287  0.19974    
## Dec               NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08475 on 171 degrees of freedom
## Multiple R-squared:  0.5178, Adjusted R-squared:  0.4868 
## F-statistic: 16.69 on 11 and 171 DF,  p-value: < 2.2e-16
#Comput the model on test data set and predict the value 
Multi_Season_pred <- data.frame(predict(Multi_Season,newdata = test_set,interval = "predict"))
## Warning in predict.lm(Multi_Season, newdata = test_set, interval = "predict"):
## prediction from a rank-deficient fit may be misleading
#As my prediction contains logrithmic function within it i need to transform it
Multi_Season_pred1 <- exp(Multi_Season_pred$fit)

#Calculate Mean Absolute Percentage Error
mape(Multi_Season_pred1,test_set$TOTAL) #0.26
## [1] 0.2605652
############# Multiplicative Seasonality with linear data ###########
Multi_Season_linear <- lm(log_Passengers~t+Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov+Dec,data = train_set)
summary(Multi_Season_linear)
## 
## Call:
## lm(formula = log_Passengers ~ t + Jan + Feb + Mar + Apr + May + 
##     Jun + Jul + Aug + Sep + Oct + Nov + Dec, data = train_set)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.099405 -0.039952 -0.008208  0.041053  0.101059 
## 
## Coefficients: (1 not defined because of singularities)
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  1.790e+01  1.358e-02 1318.204  < 2e-16 ***
## t            1.279e-03  6.746e-05   18.959  < 2e-16 ***
## Jan         -8.688e-02  1.731e-02   -5.018 1.31e-06 ***
## Feb         -1.435e-01  1.731e-02   -8.291 3.27e-14 ***
## Mar          6.457e-02  1.731e-02    3.730 0.000261 ***
## Apr          2.081e-02  1.731e-02    1.202 0.230971    
## May          5.753e-02  1.731e-02    3.323 0.001090 ** 
## Jun          1.043e-01  1.731e-02    6.026 1.01e-08 ***
## Jul          1.541e-01  1.731e-02    8.900 8.10e-16 ***
## Aug          1.199e-01  1.731e-02    6.924 8.60e-11 ***
## Sep         -4.501e-02  1.731e-02   -2.600 0.010141 *  
## Oct          2.080e-02  1.703e-02    1.222 0.223536    
## Nov         -3.729e-02  1.703e-02   -2.190 0.029890 *  
## Dec                 NA         NA       NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04817 on 170 degrees of freedom
## Multiple R-squared:  0.8452, Adjusted R-squared:  0.8342 
## F-statistic: 77.33 on 12 and 170 DF,  p-value: < 2.2e-16
#Comput the model on test data set and predict the value 
Multi_Season_linear_pred <- data.frame(predict(Multi_Season_linear,newdata = test_set,interval = "predict"))
## Warning in predict.lm(Multi_Season_linear, newdata = test_set, interval =
## "predict"): prediction from a rank-deficient fit may be misleading
#As my prediction contains logrithmic function within it we need to transform it
Multi_Season_pred2 <- exp(Multi_Season_linear_pred$fit)


#Calculate Mean Absolute Percentage Error
mape(test_set$TOTAL,Multi_Season_pred2) #MAPE=0.09
## [1] 0.09396467
############## Additive Seasonality ################

Add_season <- lm(TOTAL ~ Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov+Dec,data = train_set)
summary(Add_season)
## 
## Call:
## lm(formula = TOTAL ~ Jan + Feb + Mar + Apr + May + Jun + Jul + 
##     Aug + Sep + Oct + Nov + Dec, data = train_set)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -13013400  -3286729   -771218   2248384  13884905 
## 
## Coefficients: (1 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 67061697    1462126  45.866  < 2e-16 ***
## Jan         -5995821    2101938  -2.853 0.004873 ** 
## Feb         -9307709    2101938  -4.428 1.69e-05 ***
## Mar          4153052    2101938   1.976 0.049784 *  
## Apr          1216339    2101938   0.579 0.563570    
## May          3905928    2101938   1.858 0.064852 .  
## Jun          7344520    2101938   3.494 0.000606 ***
## Jul         11204047    2101938   5.330 3.06e-07 ***
## Aug          8677200    2101938   4.128 5.70e-05 ***
## Sep         -2666768    2101938  -1.269 0.206266    
## Oct          1283039    2067758   0.620 0.535756    
## Nov         -2474263    2067758  -1.197 0.233122    
## Dec               NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5849000 on 171 degrees of freedom
## Multiple R-squared:  0.5078, Adjusted R-squared:  0.4761 
## F-statistic: 16.04 on 11 and 171 DF,  p-value: < 2.2e-16
#Comput the model on test data set and predict the value 
Add_season_pred <- data.frame(predict(Add_season,newdata = test_set,interval = "predict"))
## Warning in predict.lm(Add_season, newdata = test_set, interval = "predict"):
## prediction from a rank-deficient fit may be misleading
#Calculate Mean Absolute Percentage Error
mape(test_set$TOTAL,Add_season_pred$fit) #MAPE=0.20
## [1] 0.2035171
############## Additive Seasonality with linear model ################

Add_season_linear <- lm(TOTAL~t+Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov+Dec,data = train_set)
summary(Add_season_linear)
## 
## Call:
## lm(formula = TOTAL ~ t + Jan + Feb + Mar + Apr + May + Jun + 
##     Jul + Aug + Sep + Oct + Nov + Dec, data = train_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -5595370 -2885119  -613655  2986714  6161212 
## 
## Coefficients: (1 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 58848878     935856  62.882  < 2e-16 ***
## t              88310       4650  18.992  < 2e-16 ***
## Jan         -5554272    1193367  -4.654 6.52e-06 ***
## Feb         -8954470    1193285  -7.504 3.30e-12 ***
## Mar          4417981    1193222   3.703 0.000288 ***
## Apr          1392959    1193177   1.167 0.244669    
## May          3994238    1193150   3.348 0.001004 ** 
## Jun          7344520    1193140   6.156 5.21e-09 ***
## Jul         11115737    1193150   9.316  < 2e-16 ***
## Aug          8500580    1193177   7.124 2.84e-11 ***
## Sep         -2931697    1193222  -2.457 0.015016 *  
## Oct          1459659    1173775   1.244 0.215374    
## Nov         -2385953    1173748  -2.033 0.043632 *  
## Dec               NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3320000 on 170 degrees of freedom
## Multiple R-squared:  0.8423, Adjusted R-squared:  0.8312 
## F-statistic: 75.69 on 12 and 170 DF,  p-value: < 2.2e-16
#Comput the model on test data set and predict the value 
Add_season_linear_pred <- data.frame(predict(Add_season_linear,newdata = test_set,interval = "predict"))
## Warning in predict.lm(Add_season_linear, newdata = test_set, interval =
## "predict"): prediction from a rank-deficient fit may be misleading
#Calculate Mean Absolute Percentage Error
mape(test_set$TOTAL,Add_season_linear_pred$fit) #MAPE=0.09
## [1] 0.09655188
# Additive Seasonality with quadratic model

Add_season_quad <- lm(TOTAL ~ t_squared + Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov+Dec,data = train_set)
summary(Add_season_quad)
## 
## Call:
## lm(formula = TOTAL ~ t_squared + Jan + Feb + Mar + Apr + May + 
##     Jun + Jul + Aug + Sep + Oct + Nov + Dec, data = train_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -7682474 -2025930  -657654  2319984  7112758 
## 
## Coefficients: (1 not defined because of singularities)
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.143e+07  8.096e+05  75.880  < 2e-16 ***
## t_squared    4.810e+02  2.257e+01  21.315  < 2e-16 ***
## Jan         -5.382e+06  1.100e+06  -4.890 2.32e-06 ***
## Feb         -8.779e+06  1.100e+06  -7.978 2.10e-13 ***
## Mar          4.596e+06  1.100e+06   4.177 4.71e-05 ***
## Apr          1.572e+06  1.100e+06   1.429 0.154801    
## May          4.174e+06  1.100e+06   3.794 0.000206 ***
## Jun          7.523e+06  1.100e+06   6.839 1.38e-10 ***
## Jul          1.129e+07  1.100e+06  10.266  < 2e-16 ***
## Aug          8.675e+06  1.100e+06   7.886 3.60e-13 ***
## Sep         -2.761e+06  1.100e+06  -2.509 0.013027 *  
## Oct          1.460e+06  1.082e+06   1.349 0.179081    
## Nov         -2.385e+06  1.082e+06  -2.204 0.028858 *  
## Dec                 NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3061000 on 170 degrees of freedom
## Multiple R-squared:  0.866,  Adjusted R-squared:  0.8565 
## F-statistic: 91.54 on 12 and 170 DF,  p-value: < 2.2e-16
#Comput the model on test data set and predict the value 
Add_season_quad_pred <- data.frame(predict(Add_season_quad,newdata = test_set,interval = "predict"))
## Warning in predict.lm(Add_season_quad, newdata = test_set, interval =
## "predict"): prediction from a rank-deficient fit may be misleading
Add_season_quad_pred
#Calculate Mean Absolute Percentage Error
mape(test_set$TOTAL,Add_season_quad_pred$fit) #MAPE=0.05
## [1] 0.05164158
#As I got mape only 0.05%  in the case of additive seasonality with the quadratic model so I will forecast my 2020 passenger values based on this model for that I will build my final model by using his method 
Final_model <- lm(TOTAL ~ t_squared + Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov+Dec,
                  data = Passenger_data3[1:207,])
summary(Final_model)
## 
## Call:
## lm(formula = TOTAL ~ t_squared + Jan + Feb + Mar + Apr + May + 
##     Jun + Jul + Aug + Sep + Oct + Nov + Dec, data = Passenger_data3[1:207, 
##     ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -7321102 -2242522  -561539  2726423  7775883 
## 
## Coefficients: (1 not defined because of singularities)
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.087e+07  7.837e+05  77.669  < 2e-16 ***
## t_squared    5.596e+02  1.705e+01  32.823  < 2e-16 ***
## Jan         -5.668e+06  1.064e+06  -5.327 2.76e-07 ***
## Feb         -9.214e+06  1.064e+06  -8.660 1.82e-15 ***
## Mar          4.616e+06  1.064e+06   4.339 2.30e-05 ***
## Apr          1.542e+06  1.064e+06   1.449   0.1489    
## May          4.370e+06  1.064e+06   4.108 5.89e-05 ***
## Jun          7.730e+06  1.064e+06   7.266 8.79e-12 ***
## Jul          1.147e+07  1.064e+06  10.778  < 2e-16 ***
## Aug          8.748e+06  1.064e+06   8.224 2.79e-14 ***
## Sep         -3.021e+06  1.064e+06  -2.839   0.0050 ** 
## Oct          1.488e+06  1.048e+06   1.419   0.1574    
## Nov         -2.535e+06  1.048e+06  -2.418   0.0165 *  
## Dec                 NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3145000 on 194 degrees of freedom
## Multiple R-squared:  0.9036, Adjusted R-squared:  0.8977 
## F-statistic: 151.6 on 12 and 194 DF,  p-value: < 2.2e-16
#Comput the model on test data set and predict the value 
Final_model_pred <- data.frame(predict(Final_model,newdata = Passenger_data3[207:219,],interval = "predict"))
## Warning in predict.lm(Final_model, newdata = Passenger_data3[207:219, ], :
## prediction from a rank-deficient fit may be misleading
#As i got my final forecasted values now i need to calculate residual errors too

resid <- Final_model$residuals
acf(resid, lag.max = 12)

#most of my error values are insignificant so i need to manage it
resid_a <- auto.arima(resid)
resid_a$residuals
## Time Series:
## Start = 1 
## End = 207 
## Frequency = 1 
##   [1]    -4724.963   232541.193  3422318.346    95939.115  -241278.838
##   [6] -3685617.915 -2690455.974  -775722.208  1115246.048  1724439.453
##  [11]  1444921.690   110838.361  1257734.304  1560959.987  1623760.699
##  [16]   -36576.279  3871018.159 -1646275.475   671707.776 -2142933.103
##  [21]   559434.023   902990.784   296821.535  -527566.945  1913229.924
##  [26]  1904824.446  -166632.436   872218.072  1623997.462   949565.549
##  [31] -1640691.376  -570355.896    21226.058   175837.497 -1249702.434
##  [36]   289775.575 -1305917.458  1817079.277  -305569.828  1123857.325
##  [41]  1056922.142  -193759.261   322869.640 -1375506.575  -824978.937
##  [46] -1131571.445 -1043490.600   923047.899   572989.923  2497058.831
##  [51]  -718634.843  1102311.213   459396.456   177369.939   812311.281
##  [56]  -520862.530  -347448.297  -561465.406  1290051.318  -822935.947
##  [61]  -191390.003  1552675.248 -2655950.378   885472.773  3091179.783
##  [66] -1281807.242 -2741202.385  -642534.141 -1071267.078 -1389780.472
##  [71]  -329126.541 -3192065.046  -463554.666 -1721573.983   359917.667
##  [76]   185188.842   565892.838 -1698211.638  1010420.994 -2316292.342
##  [81]  -344018.928  1805680.842   262478.844    28113.132  -255568.283
##  [86]  -706644.856  -107079.185  1059475.980  -885816.447   109693.595
##  [91]   405799.889  -520869.764   160251.883   391228.997   237216.351
##  [96]  1936923.466   969677.296  -598917.304 -2019002.193  -201209.640
## [101]  -749820.532   195509.743   453844.606   687336.238  -553927.425
## [106]   163812.866  -991761.149  2210646.940  -405078.373  -181849.703
## [111] -1248783.928   166329.998  1550059.202  -958954.318  -559357.838
## [116]  -617297.340  -297549.603  -726973.339   897679.733   782025.612
## [121] -1241803.763   741076.933 -1494635.582  1034860.837   331773.390
## [126]  -123810.838 -1296581.495   874953.838  -460957.319 -1615065.955
## [131]   762531.210  1490183.865  -307338.110 -1661690.383  2459285.614
## [136]  -123572.762 -1770429.981   828931.422   825647.726   667450.427
## [141]  -895114.730  -434000.115   545059.804   814389.336   135972.234
## [146] -1934297.405  1487397.822   321995.860 -1525227.755   843942.550
## [151]  1293536.434   982401.173  -597617.353  1065754.375   462402.083
## [156]   993625.898   717367.997 -1091208.093  -491236.141  -292949.083
## [161]  -181454.954  -601586.826  -832153.536  1857842.263   875436.726
## [166]  -470235.081 -1102514.922  2828363.147  -538221.319  -571034.933
## [171]  -436204.837  -307148.613 -2516592.812  1029205.279  1601888.019
## [176]   873814.170   782667.686   152611.476   -97297.193 -2557734.075
## [181]  1875453.064  1155031.249 -1229760.064 -1190950.002  -891111.337
## [186]  2262686.426  1219645.536  1392190.016  1316016.077   376268.659
## [191]  -516807.043 -3400766.729  1327013.313  1156557.129 -1696617.774
## [196] -1744087.225 -1893428.828  3895093.389  1142770.705  2146385.775
## [201]   562601.168  -429533.136  -868462.686 -1906864.489   664400.099
## [206] -2192843.259  3315856.472
acf(resid_a$residuals, lag.max = 12)

#i got the required values now we can forecast my next 12 months errors.

errors <- forecast(resid_a, h = 12,level = c(95,99))

future_errors <- data.frame(errors)

future_errors <- future_errors$Point.Forecast

# predicted values for new data + future error values 

predicted_new_values <- Final_model_pred + future_errors

#Transform the data set into time series
new_data <- ts(predicted_new_values$fit,start = c(2020,1),frequency = 12)

Passenger_ts <- ts(Passenger_data2$TOTAL,frequency = 12,start = c(2002,10))
#visualization of foretasted data with the original data
ts.plot(Passenger_ts,new_data,lty=c(1,3))

#here I can see the difference between the predicted and actual is too high, and that's what I discuss earlier, that the model run based on the past trend and seasonality but if I compute the actual 2020 data it's not able to detect the pattern and will forecast in the negative trend.

Apart from passenger count, there are other factors too that impact due to covid-19 like share price of airlines, load capacity of flights, operating revenue, In my further analysis I use some plot to explain the covid-19 impact on these factors

#Seat Per miles data
Seat_miles <- read_excel("ASM_3_19_2021.xlsx", skip = 1)
head(Seat_miles)
Seat_miles <- Seat_miles[-c(4,17,30,43,56,69,82,95,108,121,134,147,160,173,186,199,212,225,238:242),]


Seat_miles$Date <- as.Date(paste(month.abb[as.numeric(Seat_miles$Month)], "01",
                                     Seat_miles$Year, sep="-"),format = "%b-%d-%Y")

#Transform the total data into time series.
Seat_miles_ts <- ts(Seat_miles$TOTAL,frequency = 12,start = c(2002,10))
Seat_miles_ts
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2002                                                                      
## 2003  99317997  87516250  99856670  92157440  93219540  97606466 104101068
## 2004 101731051  95910200 105154882 103440891 106797409 108748141 114350123
## 2005 106280547  97551813 110949520 107923815 112451405 113379629 119035642
## 2006 107123822  96524724 111307830 109033553 112604130 113970339 119423638
## 2007 110771884  99268003 114023483 111756132 115983557 117414438 123429196
## 2008 113938958 106676924 117077397 114418521 118412173 120159608 124570530
## 2009 105344938  95026030 108554895 106758880 109132438 112007956 118349963
## 2010 104357787  91689830 107827035 104265530 110840289 113921176 120855956
## 2011 107104256  96466295 113722053 111553990 114750873 118515657 125559166
## 2012 108291408 101554314 115120344 112323388 116747958 120214447 126527895
## 2013 110149774  99882887 117068914 115512012 121080671 124263270 130603381
## 2014 113094340 101472246 122208312 120303316 126050429 130597006 137900635
## 2015 120526397 107234117 128040048 128176918 134949901 138434941 147507867
## 2016 128071084 120304405 136780778 135107343 142856481 147275917 155012382
## 2017 136209324 121796024 142922750 143081846 149591202 154927014 163827137
## 2018 140739315 127539376 149200935 150287515 158579088 162978997 170698950
## 2019 147172573 131971576 155658262 155535987 162312621 165327547 172564923
## 2020 150776705 134693243 112498114  25835632  23679036  32485474  54192617
##            Aug       Sep       Oct       Nov       Dec
## 2002                     101015447  95524045 100254985
## 2003 104784188  97482939 100674203  96199891 101450217
## 2004 114666296 103588044 107552592 102505001 108188265
## 2005 117906661 107360330 108605023 103390483 108801114
## 2006 119090347 111692210 111397911 105630403 111289117
## 2007 123685645 113331427 115494069 110566860 115643225
## 2008 122872823 107867988 109057437 102123723 106607657
## 2009 116763700 102773665 103963092  98466852 104355417
## 2010 118622738 107487430 110039961 103472594 107639793
## 2011 121611950 111190401 112860499 104460187 111066357
## 2012 124442390 111862198 111322080 106592379 111807196
## 2013 129164109 116005052 118590046 111007511 116734165
## 2014 135825279 121639742 124954189 116269356 123839664
## 2015 145096612 130303931 132920025 123798157 132156240
## 2016 152978614 138094644 139350272 129712133 139245758
## 2017 161093752 141641683 146632681 134733444 144194938
## 2018 167836616 151886924 154624685 142881112 151590645
## 2019 171387256 154344286 157136615 145141865 155724954
## 2020  60185199  54131843  60940774  64989269  69731148
dygraph(Seat_miles_ts) %>%
  dyOptions( drawPoints = TRUE, pointSize = 4 )
#Numbers of flight operating data
Flights_Data <- read_excel("Flights_3_19_2021.xlsx", skip = 1)
head(Flights_Data)
Flights_Data <- Flights_Data[-c(4,17,30,43,56,69,82,95,108,121,134,147,160,173,186,199,212,225,238:242),]


Flights_Data$Date <- as.Date(paste(month.abb[as.numeric(Flights_Data$Month)], "01",
                                     Flights_Data$Year, sep="-"),format = "%b-%d-%Y")

#Transform the total data into time series.
Flights_Data_xts <- xts( x=Flights_Data[,-c(1,2,6,5)], order.by=Flights_Data$Date)


dygraph(Flights_Data_xts)
RPM_Data <- read_excel("RPM_3_19_2021.xlsx", skip = 1)
head(RPM_Data)
RPM_Data <- RPM_Data[-c(4,17,30,43,56,69,82,95,108,121,134,147,160,173,186,199,212,225,238:242),]

RPM_Data$Date <- as.Date(paste(month.abb[as.numeric(RPM_Data$Month)], "01",
                                     RPM_Data$Year, sep="-"),format = "%b-%d-%Y")

#Transform the total data into time series.
rpm_Data_xts <- xts( x=RPM_Data[,-c(1,2,6,5)], order.by=RPM_Data$Date)

ts_plot(rpm_Data_xts)
#revenue from different zone.
Operating_Rev_Data <- read_excel("Operating_Rev_3_19_2021.xlsx",skip = 1)
head(Operating_Rev_Data)
str(Operating_Rev_Data)
## tibble [106 × 8] (S3: tbl_df/tbl/data.frame)
##  $ Year         : chr [1:106] "2000" "2000" "2000" "2000" ...
##  $ Quarter      : chr [1:106] "1" "2" "3" "4" ...
##  $ DOMESTIC     : num [1:106] 23262183 25573067 25313087 24751473 98899811 ...
##  $ LATIN AMERICA: num [1:106] 1639679 1626743 1776933 1761932 6805287 ...
##  $ ATLANTIC     : num [1:106] 2690229 3419032 3796968 3113734 13019963 ...
##  $ PACIFIC      : num [1:106] 2240370 2432840 2801208 2545414 10019832 ...
##  $ INTERNATIONAL: num [1:106] 370524 390007 370491 372305 1503327 ...
##  $ TOTAL        : num [1:106] 3.02e+07 3.34e+07 3.41e+07 3.25e+07 1.30e+08 ...
Operating_Rev_Data <- Operating_Rev_Data[-c(5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,
                                          80,85,90,95,100,104:106),]

Operating_Rev_Data$Quarter <- as.numeric(Operating_Rev_Data$Quarter)
Operating_Rev_Data$Year <- as.numeric(Operating_Rev_Data$Year)

attach(Operating_Rev_Data)
## The following object is masked from train_set:
## 
##     TOTAL
Operating_Rev_Data_ts <- with(Operating_Rev_Data, xts(cbind(DOMESTIC,`LATIN AMERICA`,
                                                          ATLANTIC,PACIFIC,INTERNATIONAL),
                                                    as.yearqtr(Year + (Quarter-1)/4)))

dygraph(Operating_Rev_Data_ts) %>%
  dyOptions( fillGraph=TRUE )
dygraph(Operating_Rev_Data_ts) %>%
  dyOptions( stepPlot=TRUE, fillGraph=TRUE)
dygraph(Operating_Rev_Data_ts) %>%
  dyOptions(stemPlot=TRUE)
ts_plot(Operating_Rev_Data_ts)
Operating_PL_Data <- read_excel("Operating_PL_3_19_2021.xlsx", skip = 1)
head(Operating_PL_Data)
str(Operating_PL_Data)
## tibble [106 × 8] (S3: tbl_df/tbl/data.frame)
##  $ Year         : chr [1:106] "2000" "2000" "2000" "2000" ...
##  $ Quarter      : chr [1:106] "1" "2" "3" "4" ...
##  $ DOMESTIC     : num [1:106] 139162 1570500 548903 -267068 1991496 ...
##  $ LATIN AMERICA: num [1:106] 58927 -86040 33056 6639 12581 ...
##  $ ATLANTIC     : num [1:106] -68918 250371 226224 -95916 311761 ...
##  $ PACIFIC      : num [1:106] -106308 154724 83675 5533 137623 ...
##  $ INTERNATIONAL: num [1:106] 839 9541 45470 23388 79238 ...
##  $ TOTAL        : num [1:106] 23702 1899096 937327 -327425 2532700 ...
Operating_PL_Data <- Operating_PL_Data[-c(5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,
                                          80,85,90,95,100,104:106),]

Operating_PL_Data$Quarter <- as.numeric(Operating_PL_Data$Quarter)
Operating_PL_Data$Year <- as.numeric(Operating_PL_Data$Year)

attach(Operating_PL_Data)
## The following objects are masked from Operating_Rev_Data:
## 
##     ATLANTIC, DOMESTIC, INTERNATIONAL, LATIN AMERICA, PACIFIC, Quarter,
##     TOTAL, Year
## The following object is masked from train_set:
## 
##     TOTAL
Operating_PL_Data_ts <- with(Operating_PL_Data, xts(cbind(DOMESTIC,`LATIN AMERICA`,
                                                          ATLANTIC,PACIFIC,INTERNATIONAL),
                                                    as.yearqtr(Year + (Quarter-1)/4)))

ts_plot(Operating_PL_Data_ts)
#Share Price of top Airlines company. 
United_Airlines <- read.csv("UAL.csv",header = TRUE)
American_Airlines <- read.csv("AAL.csv",header = TRUE)
Delta_Airlines <- read.csv("DAL.csv",header = TRUE)
southwest_Airlines <- read.csv("LUV.csv",header = TRUE)
Lufthansa_Airlines <- read.csv("LHA.DE.csv",header = TRUE)

Airlines_Shares <- as.data.frame(cbind(United_Airlines[-133,1:2],southwest_Airlines[-133,2],
                                       Delta_Airlines[-133,2],American_Airlines[-133,2],
                                       Lufthansa_Airlines[,2]))

head(Airlines_Shares)
colnames(Airlines_Shares) <- c("Date","United_Airlines","Southwest_Airlines","Delta_Airlines",
                               "American_Airlines","Lufthansa_Airlines")

Airlines_Shares$Date <- as.Date(Airlines_Shares$Date)
Shares_xts <- xts( x=Airlines_Shares[,-1], order.by=Airlines_Shares$Date)

ts_plot(Shares_xts)